In [1]:

    
import pandas as pd
import numpy as np

starting_date = '20160701'
sample_numpy_data = np.array(np.arange(24)).reshape((6,4))
dates_index = pd.date_range(starting_date, periods=6)
sample_df = pd.DataFrame(sample_numpy_data, index=dates_index, columns=list('ABCD'))

sample_df_2 = sample_df.copy()
sample_df_2['Fruits'] = ['apple', 'orange','banana','strawberry','blueberry','pineapple']

sample_series = pd.Series([1,2,3,4,5,6], index=pd.date_range(starting_date, periods=6))
sample_df_2['Extra Data'] = sample_series *3 +1

second_numpy_array = np.array(np.arange(len(sample_df_2)))  *100 + 7
sample_df_2['G'] = second_numpy_array

sample_df_2

descriptive statistics



In [ ]:

    
pd.set_option('display.precision', 2)
sample_df_2.describe()

column mean



In [ ]:

row mean

documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.mean.html



In [ ]:

apply (a function to a data frame)

documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html



In [ ]:

string methods

documentation: http://pandas.pydata.org/pandas-docs/stable/text.html



In [ ]:

    
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s.str.lower()



In [ ]:

	A	B	C	D	Fruits	Extra Data	G
2016-07-01	0	1	2	3	apple	4	7
2016-07-02	4	5	6	7	orange	7	107
2016-07-03	8	9	10	11	banana	10	207
2016-07-04	12	13	14	15	strawberry	13	307
2016-07-05	16	17	18	19	blueberry	16	407
2016-07-06	20	21	22	23	pineapple	19	507